(II) Data Analysis
gapminder_url = "https://bioconnector.github.io/workshops/data/gapminder.csv"
gapminder = read_csv(gapminder_url)
render_df = function(df) {
row_size = df %>% dim() %>% magrittr::extract(1)
output_table = df %>%
kable(align = "c") %>%
kable_styling(bootstrap_options = c("striped",
"hover",
"responsive",
"condensed"),
fixed_thead = TRUE,
full_width = FALSE) %>%
row_spec(0:row_size, extra_css = "vertical-align: middle;")
return(output_table)
}
1. Number of countries per continent
- How many unique countries are represented per continent?
df_1 = gapminder %>%
select(continent, country) %>%
group_by(continent) %>%
summarise(country = country %>% n_distinct()) %>%
rename(Continent = continent,
Country = country)
df_1 %>% render_df()
|
Continent
|
Country
|
|
Africa
|
52
|
|
Americas
|
25
|
|
Asia
|
33
|
|
Europe
|
30
|
|
Oceania
|
2
|
p_1 = ggplot(data = df_1, mapping = aes(x = Continent, y = Country))+
geom_bar(stat = "identity", fill = "cornflowerblue", width = 0.5) +
ggtitle("Country Number of Each Continent") +
theme(plot.title = element_text(size = 20, hjust = 0.5))
p_1 %>% ggplotly()
2. Average life expectancy
- According to the data available, what was the average Life Expectancy across each continent from 1952 to 2007?
df_2 = gapminder %>%
select(continent, year, lifeExp) %>%
group_by(continent, year) %>%
summarise(`Average Life Expectancy` = mean(lifeExp)) %>%
rename(Continent = continent, Year = year)
p_2 = ggplot(data = df_2, mapping = aes(x = Year, y = `Average Life Expectancy`, color = Continent)) +
geom_point() +
geom_line() +
ggtitle("Average Life Expectancy per Continent") +
ylab("Life Expectancy (Years)") +
theme(plot.title = element_text(size = 20, hjust = 0.5))
p_2 %>% ggplotly()
- What was the Life Expectancy for every countries in Americas?
df_3 = gapminder %>%
filter(continent %>% equals("Americas")) %>%
select(country, year, lifeExp) %>%
group_by(country, year) %>%
summarise(`Average Life Expectancy` = mean(lifeExp)) %>%
rename(Country = country, Year = year)
p_3 = ggplot(data = df_3, mapping = aes(x = Year, y = `Average Life Expectancy`, color = Country)) +
geom_point() +
geom_line() +
ggtitle("Average Life Expectancy in Americas") +
ylab("Life Expectancy (Years)") +
theme(plot.title = element_text(size = 20, hjust = 0.5))
p_3 %>% ggplotly()
- What were the countries that have the longest average Life Expectancy in the world?
df_4 = gapminder %>%
select(country, lifeExp) %>%
group_by(country) %>%
summarise(`Average Life Expectancy` = mean(lifeExp)) %>%
rename(Country = country) %>%
arrange(`Average Life Expectancy` %>% desc()) %>%
slice(1:5)
df_4 %>% render_df()
|
Country
|
Average Life Expectancy
|
|
Iceland
|
76.51142
|
|
Sweden
|
76.17700
|
|
Norway
|
75.84300
|
|
Netherlands
|
75.64850
|
|
Switzerland
|
75.56508
|
- What were the countries that have the shortest average Life Expectancy in the world?
df_5 = gapminder %>%
select(country, lifeExp) %>%
group_by(country) %>%
summarise(`Average Life Expectancy` = mean(lifeExp)) %>%
rename(Country = country) %>%
arrange(`Average Life Expectancy`) %>%
slice(1:5)
df_5 %>% render_df()
|
Country
|
Average Life Expectancy
|
|
Sierra Leone
|
36.76917
|
|
Afghanistan
|
37.47883
|
|
Angola
|
37.88350
|
|
Guinea-Bissau
|
39.21025
|
|
Mozambique
|
40.37950
|
3. Average population
- According to the data available, what was the average Population across each continent from 1952 to 2007?
df_6 = gapminder %>%
select(continent, year, pop) %>%
group_by(continent, year) %>%
summarise(`Average Population` = mean(pop)) %>%
rename(Continent = continent, Year = year)
population_labels = "0" %>% c(seq(from = 10, to = 120, by = 10) %>% paste0("M"))
popupation_breaks = seq(from = 0, to = 120, by = 10) * 10^6
p_6 = ggplot(data = df_6, mapping = aes(x = Year, y = `Average Population`, color = Continent)) +
geom_point() +
geom_line() +
ggtitle("Average Population per Continent") +
theme(plot.title = element_text(size = 20, hjust = 0.5)) +
scale_y_continuous(labels = population_labels, breaks = popupation_breaks)
p_6 %>% ggplotly()
- What was the population for every countries in Americas?
df_7 = gapminder %>%
filter(continent %>% equals("Americas")) %>%
select(country, year, pop) %>%
group_by(country, year) %>%
summarise(`Average Population` = mean(pop)) %>%
rename(Country = country, Year = year)
population_labels = "0" %>% c(seq(from = 30, to = 300, by = 30) %>% paste0("M"))
popupation_breaks = seq(from = 0, to = 300, by = 30) * 10^6
p_7 = ggplot(data = df_7, mapping = aes(x = Year, y = `Average Population`, color = Country)) +
geom_point() +
geom_line() +
ggtitle("Average Population in Americas") +
theme(plot.title = element_text(size = 20, hjust = 0.5)) +
scale_y_continuous(labels = population_labels, breaks = popupation_breaks)
p_7 %>% ggplotly()
4. Average GDP per Capita
- According to the data available, what was the average GDP per Capita across each continent from 1952 to 2007?
df_8=tapply(gapminder$gdpPercap,list(gapminder$continent,gapminder$year),mean)
df_8=data.frame(t(df_8))
Year=seq(from=1952,to=2007,by=5)
df_8=cbind(Year,df_8)
df_8=gather(data=df_8,continent,gdpPercap,-Year)
p3=ggplot(data=df_8,aes(x=Year,y=gdpPercap,color=continent))+
geom_point()+
geom_line()+
ggtitle("Average GDP per Capita")+
xlab("Year")+
ylab("GDP per Capita")+
theme(plot.title=element_text(size=20,hjust=0.5))
p3

- What was the GDP Per Capita for every countries in Americas?
df_9=gapminder%>%filter(continent=="Americas")
df_9=tapply(df_9$gdpPercap,list(df_9$country,df_9$year),mean)
df_9=t(data.frame(df_9))
row.names(df_9)=seq(1952,2007,5)
matplot(seq(1952,2007,5),df_9,type="l",lty=1,xlab="Years",ylab="GDP Per Capita")
legend("topleft",legend=c("Top 1: America","Top 2: Canada"),lty=1,col=3)

(III) Interactive Web Application
library(shiny)
UI=fluidPage(
titlePanel("World Facts"),
sidebarLayout(
sidebarPanel(
selectInput(inputId="select",
label="Choose a country",
choices=unique(gapminder$country)
),
selectInput(inputId="object",
label="Choose from the following",
choices=c("Life Expectancy","Population","GDP per Capita")
)
),
mainPanel(plotOutput(outputId="figure"),
tableOutput(outputId="data")
)
)
)
SERVER=function(input,output){
f=function(temp){
result=subset(gapminder,gapminder$country==temp)
return(result)
}
output$figure=renderPlot({
country.name=reactive(input$select)
dat=f(country.name())
if (input$object=="Life Expectancy"){
plot(dat$lifeExp~dat$year,xlim=c(1950,2010),xlab="Year",ylab="Life Expectancy",lty=2,type="l",main=c("Life Expectancy of ",country.name()))
points(dat$lifeExp~dat$year,pch=19,col=1)
}
if (input$object=="Population"){
plot(dat$pop~dat$year,xlim=c(1950,2010),xlab="Year",ylab="Population",lty=2,type="l",main=c("Population of ",country.name()))
points(dat$pop~dat$year,pch=19,col=1)
}
if (input$object=="GDP per Capita"){
plot(dat$gdpPercap~dat$year,xlim=c(1950,2010),xlab="Year",ylab="GDP per Capita",lty=2,type="l",main=c("GDP per Capita of ",country.name()))
points(dat$gdpPercap~dat$year,pch=19,col=1)
}
})
output$data=renderTable(colnames=T,{
country.name=reactive(input$select)
dat=f(country.name())
if (input$object=="Life Expectancy"){
temp=c()
temp$year=dat$year
temp$`Life Expectancy`=dat$lifeExp
return(temp)
}
if (input$object=="Population"){
temp=c()
temp$year=dat$year
temp$pop=dat$pop
return(temp)
}
if (input$object=="GDP per Capita"){
temp=c()
temp$year=dat$year
temp$`GDP per Capita`=dat$gdpPercap
return(temp)
}
})
}
shinyApp(ui=UI,server=SERVER)